Within the context of recent economic growth within the City of Buffalo (before Covid-19), this project will examine changes in Single Family Housing Prices from 2017 - 2020. Buffalo has experienced rising real estate values in response to economic development projects, university expansions and workforce development programs. This project will compare changes in housing prices across neighborhoods and investigate possible factors that influence housing prices such as number of bathrooms, bedrooms and total living area.
Sources: Open Data Buffalo, Tax Assessment Data 2017 - 2020 Open Data Buffalo, Neighborhood Boundary Shapefile
library(tidyverse)
library(ggplot2)
library(ggmap)
library(maptools)
library(ggthemes)
library(rgeos)
library(broom)
library(plyr)
library(dplyr)
library(grid)
library(gridExtra)
library(reshape2)
library(scales)
library(sp)
library(sf)
library(rgdal)
library(RColorBrewer)
library(kableExtra)
library(leaflet)
knitr::opts_chunk$set(cache=TRUE)
# 2017 - 2019 Buffalo Assessment Roll
Parcel17 <- read.csv(file = "https://raw.githubusercontent.com/geo511-2020/geo511-2020-project-erikwoyc/master/2017-2018_Assessment_Roll.csv")
SingleFam_propclass <- c("210", "215", "240", "241", "250", "270")
Buffalo_17 <- filter(Parcel17, PROPERTY.CLASS %in% SingleFam_propclass)
# 2019 - 2020 Buffalo Assessment Roll
Parcel20 <- read.csv(file = "https://raw.githubusercontent.com/geo511-2020/geo511-2020-project-erikwoyc/master/2019-2020_Assessment_Roll.csv")
SingleFam_propclass <- c("210", "215", "240", "241", "250", "270")
Buffalo_20 <- filter(Parcel20, PROPERTY.CLASS %in% SingleFam_propclass)
# Neighborhood Shapefile
Neighborhood_URL <- "https://data.buffalony.gov/api/geospatial/q9bk-zu3p?method=export&format=GeoJSON"
Buffalo_Neighborhoods <- st_read(dsn = Neighborhood_URL)
Buffalo_sp <- as_Spatial(Buffalo_Neighborhoods)
#Snapshot of Assessment Data from 2017 - 2018
Parcel17 %>%
select(PROPERTY.CLASS, PROP.CLASS.DESCRIPTION, TOTAL.VALUE, TOTAL.LIVING.AREA, NEIGHBORHOOD, LOCATION) %>%
slice(1:10) %>%
kable(digits=2,align="c") %>%
kable_styling(bootstrap_options =
c("striped", "hover", "condensed", "responsive"))
| PROPERTY.CLASS | PROP.CLASS.DESCRIPTION | TOTAL.VALUE | TOTAL.LIVING.AREA | NEIGHBORHOOD | LOCATION |
|---|---|---|---|---|---|
| 710 | MANUFACTURING & PROCESSING | 825000 | 0 | ||
| 350 | URBAN RENEWAL VACANT LAND | 6900 | 0 | ||
| 841 | MOTOR VEHICLE | 22440000 | 0 | Parkside | (42.93973696004812, -78.83864068992023) |
| 311 | RESIDENTIAL VACANT LAND | 400 | 0 | ||
| 220 | TWO FAMILY DWELLING | 54500 | 2302 | Elmwood Bidwell | (42.91848032397568, -78.86218043101054) |
| 482 | DOWNTOWN ROW TYPE (DETACHED) | 275000 | 0 | Elmwood Bidwell | (42.9194355694349, -78.87709280994117) |
| 482 | DOWNTOWN ROW TYPE (DETACHED) | 34500 | 0 | Elmwood Bidwell | (42.920238159038476, -78.85944723608432) |
| 210 | ONE FAMILY DWELLING | 210000 | 2106 | Elmwood Bidwell | (42.918710564992416, -78.87441389506147) |
| 482 | DOWNTOWN ROW TYPE (DETACHED) | 407500 | 0 | Elmwood Bidwell | (42.91772431790134, -78.87710775061227) |
| 220 | TWO FAMILY DWELLING | 110000 | 2076 | Elmwood Bidwell | (42.9198347309033, -78.87709262249685) |
# 2017 - 2018 Single Family Housing Price Histogram
Plot_2017 <- ggplot(data = Buffalo_17, mapping = aes(x = TOTAL.VALUE)) +
geom_histogram() + xlab("Total Property Value($)") + ylab("Count") +
scale_fill_manual(values="lightblue") + theme_few() +
labs(x="Total Value($)", y="Count", title="Distribution of Buffalo Home Prices",
subtitle="Single Family Property Prices (2017 - 2018)",
caption="Source: Buffalo Open Data") + scale_x_continuous() + scale_y_continuous()
plot(Plot_2017)

# 2019 - 2020 Single Family Housing Price Histogram
Plot_2019 <- ggplot(data = Buffalo_20, mapping = aes(x = TOTAL.VALUE)) +
geom_histogram() + xlab("Total Property Value($)") + ylab("Count") +
scale_fill_manual(values="lightblue") + theme_few() +
labs(x="Total Value($)", y="Count", title="Distribution of Buffalo Home Prices",
subtitle="Single Family Property Prices (2019 - 2020)",
caption="Source: Buffalo Open Data") + scale_x_continuous() + scale_y_continuous()
plot(Plot_2019)

#Buffalo Bounding Box
Buffalo_bbox <- Buffalo_sp@bbox
# Download the basemap
basemap <- get_stamenmap(
bbox = Buffalo_bbox,
zoom = 13,
maptype = "toner-lite")
# View Map
BFMap <- ggmap(basemap) +
labs(title="Buffalo Basemap")
BFMap

SingleFam17 <- ggmap(basemap) +
geom_point(data = Buffalo_17, aes(x = LONGITUDE, y = LATITUDE, color = TOTAL.VALUE),
size = .025, alpha = 0.7) +
scale_color_gradient("Single Family Home Price", low = "light green", high = "dark green", trans="log",
labels = scales::dollar_format(prefix = "$")) +
labs(title="Distribution of Buffalo Home Prices",
subtitle="Property Prices (2017 - 2018)",
caption="Open Data Buffalo")
SingleFam17

SingleFam20 <- ggmap(basemap) +
geom_point(data = Buffalo_20, aes(x = LONGITUDE, y = LATITUDE, color = TOTAL.VALUE),
size = .025, alpha = 0.7) +
scale_color_gradient("Single Family Home Price", low = "light green", high = "dark green", trans="log",
labels = scales::dollar_format(prefix = "$")) +
labs(title="Distribution of Buffalo Home Prices",
subtitle="Property Prices (2019 - 2020)",
caption="Open Data Buffalo")
SingleFam20

#Color Pallette
pallete <- colorNumeric("viridis", NULL)
Neighborhood_map <- leaflet() %>%
setMaxBounds(lng1 = -78.91246, lat1 = 42.82603, lng2 = -78.79504, lat2 = 42.96641) %>%
addProviderTiles("CartoDB") %>%
addProviderTiles("Stamen.TonerLines",
options = providerTileOptions(opacity = 0.35)) %>%
addCircles(data = Buffalo_17, lng = Buffalo_17$LONGITUDE, lat = Buffalo_17$LATITUDE,
color = ~pallete(log(Buffalo_17$TOTAL.VALUE)),
radius = .05, opacity = 0.5,
group = "2017 - 2018") %>%
addCircles(data = Buffalo_20, lng = Buffalo_20$LONGITUDE, lat = Buffalo_20$LATITUDE,
color = ~pallete(log(Buffalo_20$TOTAL.VALUE)),
radius = .05, opacity = 0.5,
group = "2019 - 2020") %>%
addPolygons(data = Buffalo_sp, fillColor = "transparent", color = "#444444", weight = 2) %>%
addLayersControl(overlayGroups = c("2017-2018", "2019-2020")) %>%
addLegend(position = "bottomleft", pal = pallete, values = Buffalo_20$TOTAL.VALUE,
title = "Single Family Home Value")
Neighborhood_map
# Distribution of Single Family Homes by Year Built
Year_built <- ggplot(data = Buffalo_20, mapping = aes(x = YEAR.BUILT)) +
geom_histogram() + xlab("Year Built") + ylab("Number of Homes") +
scale_fill_manual(values="lightblue") + theme_few() +
labs(x="Year Built", y="Number of Homes", title="Distribution of Single Family Homes by Year Built",
caption="Source: Buffalo Open Data") + scale_x_continuous() + scale_y_continuous()
Year_built

# Price by Living Area 2019 - 2020
live_price20 <- ggplot(data = Buffalo_20, aes(x = TOTAL.LIVING.AREA, y = TOTAL.VALUE)) +
labs(x = "Total Living Area (sqft)", y = "Total Value Single Family Home", title = "Price by Square ft of Living Space") +
geom_point()
live_price20

# Price by Bedrooms 2019 - 2020
bed_price <- ggplot(data = Buffalo_20, aes(x = X..OF.BEDS, y = TOTAL.VALUE)) +
labs(x = "Number of Bedrooms", y = "Total Value Single Family Home", title = "Price by Number of Bedrooms") +
geom_col()
bed_price

## Transform Data for Regression
Buffalo_20$log <- log10(Buffalo_20$TOTAL.VALUE)
View(Buffalo_20)
## Multiple Linear Regression
MLR <- lm(log ~ X..OF.BATHS + X..OF.BEDS +
+ YEAR.BUILT + TOTAL.LIVING.AREA + PROPERTY.CLASS + BASEMENT.TYPE,
data = Buffalo_20)
summary(MLR)
##
## Call:
## lm(formula = log ~ X..OF.BATHS + X..OF.BEDS + +YEAR.BUILT + TOTAL.LIVING.AREA +
## PROPERTY.CLASS + BASEMENT.TYPE, data = Buffalo_20)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.34156 -0.12204 0.00818 0.15185 1.12611
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.373e+00 4.388e+00 -1.908 0.0564 .
## X..OF.BATHS 7.396e-02 3.214e-03 23.008 <2e-16 ***
## X..OF.BEDS -5.799e-02 1.682e-03 -34.470 <2e-16 ***
## YEAR.BUILT 3.541e-03 4.767e-05 74.287 <2e-16 ***
## TOTAL.LIVING.AREA 3.244e-04 2.837e-06 114.327 <2e-16 ***
## PROPERTY.CLASS 2.651e-02 2.089e-02 1.269 0.2045
## BASEMENT.TYPE 8.841e-02 1.587e-03 55.724 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2335 on 37795 degrees of freedom
## (62 observations deleted due to missingness)
## Multiple R-squared: 0.5164, Adjusted R-squared: 0.5163
## F-statistic: 6727 on 6 and 37795 DF, p-value: < 2.2e-16
All sources are cited in a consistent manner